In [7]:
# Render our plots inline
%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np # we'll need this for sqrt and mean
pd.set_option('display.mpl_style', 'default') # Make the graphs a bit prettier
plt.rcParams['figure.figsize'] = (15, 5)
In [8]:
NBA = pd.read_csv("NBA_train.csv")
Simply printing out the DataFrame will give us similar information to R's str()
:
In [38]:
NBA
Out[38]:
SeasonEnd
Team
Playoffs
W
PTS
oppPTS
FG
FGA
2P
2PA
...
FTA
ORB
DRB
AST
STL
BLK
TOV
PTSdiff
X2PA
X3PA
0
1980
Atlanta Hawks
1
50
8573
8334
3261
7027
3248
6952
...
2645
1369
2406
1913
782
539
1495
239
6952
75
1
1980
Boston Celtics
1
61
9303
8664
3617
7387
3455
6965
...
2449
1227
2457
2198
809
308
1539
639
6965
422
2
1980
Chicago Bulls
0
30
8813
9035
3362
6943
3292
6668
...
2592
1115
2465
2152
704
392
1684
-222
6668
275
3
1980
Cleveland Cavaliers
0
37
9360
9332
3811
8041
3775
7854
...
2205
1307
2381
2108
764
342
1370
28
7854
187
4
1980
Denver Nuggets
0
30
8878
9240
3462
7470
3379
7215
...
2539
1311
2524
2079
746
404
1533
-362
7215
255
5
1980
Detroit Pistons
0
16
8933
9609
3643
7596
3586
7377
...
2149
1226
2415
1950
783
562
1742
-676
7377
219
6
1980
Golden State Warriors
0
24
8493
8853
3527
7318
3500
7197
...
1914
1155
2437
2028
779
339
1492
-360
7197
121
7
1980
Houston Rockets
1
41
9084
9070
3599
7496
3495
7117
...
2326
1394
2217
2149
782
373
1565
14
7117
379
8
1980
Indiana Pacers
0
37
9119
9176
3639
7689
3551
7375
...
2333
1398
2326
2148
900
530
1517
-57
7375
314
9
1980
Kansas City Kings
1
47
8860
8603
3582
7489
3557
7375
...
2250
1187
2429
2123
863
356
1439
257
7375
114
10
1980
Los Angeles Lakers
1
60
9438
8954
3898
7368
3878
7268
...
2092
1085
2653
2413
774
546
1639
484
7268
100
11
1980
Milwaukee Bucks
1
49
9025
8702
3685
7553
3635
7398
...
2102
1245
2396
2277
778
510
1496
323
7398
155
12
1980
New Jersey Nets
0
34
8879
8975
3456
7504
3371
7206
...
2406
1229
2535
2094
869
581
1702
-96
7206
298
13
1980
New York Knicks
0
39
9344
9438
3802
7672
3760
7481
...
2274
1236
2303
2265
881
457
1613
-94
7481
191
14
1980
Philadelphia 76ers
1
59
8949
8603
3523
7156
3496
7031
...
2431
1187
2635
2226
792
652
1708
346
7031
125
15
1980
Phoenix Suns
1
55
9114
8819
3570
7235
3502
6955
...
2466
1071
2458
2283
908
344
1629
295
6955
280
16
1980
Portland Trail Blazers
1
38
8402
8469
3408
7167
3382
7035
...
2100
1295
2408
1898
708
472
1552
-67
7035
132
17
1980
San Antonio Spurs
1
41
9788
9819
3856
7738
3804
7532
...
2528
1153
2515
2326
771
333
1589
-31
7532
206
18
1980
San Diego Clippers
0
35
8820
9160
3524
7494
3347
6951
...
2167
1294
2308
1688
664
288
1443
-340
6951
543
19
1980
Seattle SuperSonics
1
56
8897
8515
3554
7565
3495
7376
...
2253
1380
2550
2043
750
428
1496
382
7376
189
20
1980
Utah Jazz
0
24
8394
8887
3382
6817
3323
6632
...
1943
967
2359
2005
656
362
1543
-493
6632
185
21
1980
Washington Bullets
1
39
8773
8982
3574
7796
3501
7558
...
2048
1334
2723
2201
530
443
1380
-209
7558
238
22
1981
Atlanta Hawks
0
31
8604
8858
3291
6866
3281
6784
...
2590
1201
2224
1846
749
469
1605
-254
6784
82
23
1981
Boston Celtics
1
62
9008
8526
3581
7099
3516
6858
...
2369
1155
2424
2202
683
594
1577
482
6858
241
24
1981
Chicago Bulls
1
45
8937
8775
3457
6903
3419
6724
...
2563
1227
2475
1925
729
514
1672
162
6724
179
25
1981
Cleveland Cavaliers
0
28
8670
9068
3556
7609
3484
7360
...
1909
1258
2243
2007
632
322
1396
-398
7360
249
26
1981
Dallas Mavericks
0
15
8322
9011
3204
6928
3158
6763
...
2487
1109
2177
1984
561
214
1439
-689
6763
165
27
1981
Denver Nuggets
0
37
9986
10025
3784
7960
3754
7815
...
3051
1325
2497
2030
720
380
1444
-39
7815
145
28
1981
Detroit Pistons
0
21
8174
8692
3236
6986
3223
6902
...
2330
1201
2111
1819
884
492
1759
-518
6902
84
29
1981
Golden State Warriors
0
39
9006
9103
3560
7284
3500
7074
...
2513
1403
2366
2026
611
301
1547
-97
7074
210
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
805
2011
Atlanta Hawks
1
44
7790
7857
2971
6429
2469
5002
...
1728
762
2460
1802
497
341
1118
-67
5002
1427
806
2011
Boston Celtics
1
56
7913
7473
3023
6219
2615
5100
...
1895
639
2542
1921
676
344
1195
440
5100
1119
807
2011
Charlotte Bobcats
0
34
7650
7978
2873
6365
2480
5162
...
1999
848
2444
1728
524
433
1192
-328
5162
1203
808
2011
Chicago Bulls
1
62
8087
7487
3042
6587
2531
5172
...
2008
967
2654
1827
592
468
1161
600
5172
1415
809
2011
Cleveland Cavaliers
0
19
7827
8566
2886
6647
2377
5158
...
2075
856
2449
1720
544
341
1166
-739
5158
1489
810
2011
Dallas Mavericks
1
57
8220
7873
3069
6463
2424
4695
...
1850
780
2618
1954
557
352
1145
347
4695
1768
811
2011
Denver Nuggets
1
50
8811
8421
3145
6613
2483
4909
...
2429
791
2652
1813
605
352
1157
390
4909
1704
812
2011
Detroit Pistons
0
30
7951
8246
3056
6647
2584
5391
...
1854
931
2236
1730
595
328
1067
-295
5391
1256
813
2011
Golden State Warriors
0
36
8477
8668
3251
7047
2566
5298
...
1695
955
2370
1847
737
406
1198
-191
5298
1749
814
2011
Houston Rockets
0
43
8685
8506
3170
6975
2493
5132
...
2083
962
2549
1955
581
371
1110
179
5132
1843
815
2011
Indiana Pacers
1
37
8183
8271
3003
6787
2418
5134
...
2035
914
2657
1611
584
456
1262
-88
5134
1653
816
2011
Los Angeles Clippers
0
32
8089
8346
3015
6594
2502
5075
...
2187
955
2501
1813
585
402
1343
-257
5075
1519
817
2011
Los Angeles Lakers
1
57
8321
7820
3128
6757
2604
5270
...
1979
989
2616
1801
602
422
1073
501
5270
1487
818
2011
Memphis Grizzlies
1
46
8195
8003
3200
6801
2891
5875
...
1981
970
2391
1691
771
441
1145
192
5875
926
819
2011
Miami Heat
1
58
8369
7757
3031
6301
2484
4822
...
2288
790
2666
1639
544
430
1142
612
4822
1479
820
2011
Milwaukee Bucks
0
35
7534
7603
2814
6544
2331
5130
...
1881
862
2480
1545
617
399
1103
-69
5130
1414
821
2011
Minnesota Timberwolves
0
17
8288
8832
3090
7014
2501
5449
...
1977
1085
2556
1650
592
422
1398
-544
5449
1565
822
2011
New Jersey Nets
0
24
7722
8234
2918
6638
2459
5301
...
1881
909
2440
1723
458
384
1152
-512
5301
1337
823
2011
New Orleans Hornets
1
46
7784
7711
2944
6416
2500
5184
...
1897
824
2468
1691
624
359
1069
73
5184
1232
824
2011
New York Knicks
1
42
8734
8670
3140
6867
2375
4786
...
2087
847
2470
1757
625
475
1123
64
4786
2081
825
2011
Oklahoma City Thunder
1
55
8596
8285
3066
6609
2579
5206
...
2401
903
2604
1672
654
487
1156
311
5206
1403
826
2011
Orlando Magic
1
52
8135
7687
2956
6411
2186
4308
...
2101
864
2679
1636
548
384
1224
448
4308
2103
827
2011
Philadelphia 76ers
1
41
8119
7996
3125
6776
2682
5528
...
1851
850
2578
1861
621
355
1063
123
5528
1248
828
2011
Phoenix Suns
0
40
8611
8684
3219
6844
2518
4987
...
1939
821
2478
1945
545
357
1169
-73
4987
1857
829
2011
Portland Trail Blazers
1
48
7896
7771
2951
6599
2433
5096
...
1835
996
2230
1736
660
358
1070
125
5096
1503
830
2011
Sacramento Kings
0
24
8151
8589
3134
6979
2706
5702
...
1981
1071
2526
1675
608
391
1324
-438
5702
1277
831
2011
San Antonio Spurs
1
61
8502
8034
3148
6628
2463
4901
...
1984
829
2603
1836
602
372
1101
468
4901
1727
832
2011
Toronto Raptors
0
22
8124
8639
3144
6755
2799
5664
...
1976
963
2343
1795
581
350
1206
-515
5664
1091
833
2011
Utah Jazz
0
39
8153
8303
3064
6590
2629
5334
...
2061
898
2338
1921
629
484
1175
-150
5334
1256
834
2011
Washington Wizards
0
23
7977
8584
3048
6888
2656
5706
...
1999
1013
2374
1592
665
502
1258
-607
5706
1182
835 rows × 23 columns
And for the summary we'll use an equivalent method, DataFrame.describe()
:
In [37]:
NBA.describe()
Out[37]:
SeasonEnd
Playoffs
W
PTS
oppPTS
FG
FGA
2P
2PA
3P
...
FTA
ORB
DRB
AST
STL
BLK
TOV
PTSdiff
X2PA
X3PA
count
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
...
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
835.000000
mean
1996.319760
0.574850
41.000000
8370.239521
8370.239521
3200.367665
6873.318563
2881.324551
5956.444311
319.043114
...
2189.953293
1061.584431
2427.354491
1912.112575
668.364072
419.805988
1302.837126
0.000000
5956.444311
916.874251
std
9.243808
0.494662
12.740822
581.040114
587.543959
287.181266
401.027166
446.097941
830.596327
199.698941
...
244.491086
150.224519
130.671523
221.610925
93.393044
82.274913
153.973470
379.547673
830.596327
523.982964
min
1980.000000
0.000000
11.000000
6901.000000
6909.000000
2565.000000
5972.000000
1981.000000
4153.000000
10.000000
...
1475.000000
639.000000
2044.000000
1423.000000
455.000000
204.000000
931.000000
-1246.000000
4153.000000
75.000000
25%
1989.000000
0.000000
31.000000
7934.000000
7934.000000
2974.000000
6563.500000
2510.000000
5269.000000
131.500000
...
2008.000000
953.500000
2346.500000
1735.000000
599.000000
359.000000
1192.000000
-268.000000
5269.000000
413.000000
50%
1996.000000
1.000000
42.000000
8312.000000
8365.000000
3150.000000
6831.000000
2718.000000
5706.000000
329.000000
...
2176.000000
1055.000000
2433.000000
1899.000000
658.000000
410.000000
1289.000000
21.000000
5706.000000
942.000000
75%
2005.000000
1.000000
50.500000
8784.500000
8768.500000
3434.500000
7157.000000
3296.000000
6753.500000
481.500000
...
2352.000000
1167.000000
2516.500000
2077.500000
729.000000
469.500000
1395.500000
287.500000
6753.500000
1347.500000
max
2011.000000
1.000000
72.000000
10371.000000
10723.000000
3980.000000
8868.000000
3954.000000
7873.000000
841.000000
...
3051.000000
1520.000000
2753.000000
2575.000000
1053.000000
716.000000
1873.000000
1004.000000
7873.000000
2284.000000
8 rows × 22 columns
Now we want to create a table with Playoffs and Wins. In R, the command is table(NBA$W, NBA$Playoffs)
.
In [10]:
NBA[['Playoffs', 'W']].groupby('W').aggregate(sum)
Out[10]:
Playoffs
W
11
0
12
0
13
0
14
0
15
0
16
0
17
0
18
0
19
0
20
0
21
0
22
0
23
0
24
0
25
0
26
0
27
0
28
0
29
0
30
1
31
1
32
0
33
0
34
0
35
3
36
4
37
4
38
7
39
10
40
13
41
26
42
29
43
18
44
27
45
22
46
15
47
28
48
14
49
17
50
32
51
12
52
20
53
17
54
18
55
24
56
16
57
23
58
13
59
14
60
8
61
10
62
13
63
7
64
3
65
3
66
2
67
4
69
1
72
1
Create a new column in NBA, which contains the difference between points scored and lost. (R: NBA$PTSdiff = NBA$PTS - NBA$oppPTS
)
In [11]:
NBA['PTSdiff'] = NBA['PTS'] - NBA['oppPTS']
In [12]:
NBA
Out[12]:
SeasonEnd
Team
Playoffs
W
PTS
oppPTS
FG
FGA
2P
2PA
...
3PA
FT
FTA
ORB
DRB
AST
STL
BLK
TOV
PTSdiff
0
1980
Atlanta Hawks
1
50
8573
8334
3261
7027
3248
6952
...
75
2038
2645
1369
2406
1913
782
539
1495
239
1
1980
Boston Celtics
1
61
9303
8664
3617
7387
3455
6965
...
422
1907
2449
1227
2457
2198
809
308
1539
639
2
1980
Chicago Bulls
0
30
8813
9035
3362
6943
3292
6668
...
275
2019
2592
1115
2465
2152
704
392
1684
-222
3
1980
Cleveland Cavaliers
0
37
9360
9332
3811
8041
3775
7854
...
187
1702
2205
1307
2381
2108
764
342
1370
28
4
1980
Denver Nuggets
0
30
8878
9240
3462
7470
3379
7215
...
255
1871
2539
1311
2524
2079
746
404
1533
-362
5
1980
Detroit Pistons
0
16
8933
9609
3643
7596
3586
7377
...
219
1590
2149
1226
2415
1950
783
562
1742
-676
6
1980
Golden State Warriors
0
24
8493
8853
3527
7318
3500
7197
...
121
1412
1914
1155
2437
2028
779
339
1492
-360
7
1980
Houston Rockets
1
41
9084
9070
3599
7496
3495
7117
...
379
1782
2326
1394
2217
2149
782
373
1565
14
8
1980
Indiana Pacers
0
37
9119
9176
3639
7689
3551
7375
...
314
1753
2333
1398
2326
2148
900
530
1517
-57
9
1980
Kansas City Kings
1
47
8860
8603
3582
7489
3557
7375
...
114
1671
2250
1187
2429
2123
863
356
1439
257
10
1980
Los Angeles Lakers
1
60
9438
8954
3898
7368
3878
7268
...
100
1622
2092
1085
2653
2413
774
546
1639
484
11
1980
Milwaukee Bucks
1
49
9025
8702
3685
7553
3635
7398
...
155
1605
2102
1245
2396
2277
778
510
1496
323
12
1980
New Jersey Nets
0
34
8879
8975
3456
7504
3371
7206
...
298
1882
2406
1229
2535
2094
869
581
1702
-96
13
1980
New York Knicks
0
39
9344
9438
3802
7672
3760
7481
...
191
1698
2274
1236
2303
2265
881
457
1613
-94
14
1980
Philadelphia 76ers
1
59
8949
8603
3523
7156
3496
7031
...
125
1876
2431
1187
2635
2226
792
652
1708
346
15
1980
Phoenix Suns
1
55
9114
8819
3570
7235
3502
6955
...
280
1906
2466
1071
2458
2283
908
344
1629
295
16
1980
Portland Trail Blazers
1
38
8402
8469
3408
7167
3382
7035
...
132
1560
2100
1295
2408
1898
708
472
1552
-67
17
1980
San Antonio Spurs
1
41
9788
9819
3856
7738
3804
7532
...
206
2024
2528
1153
2515
2326
771
333
1589
-31
18
1980
San Diego Clippers
0
35
8820
9160
3524
7494
3347
6951
...
543
1595
2167
1294
2308
1688
664
288
1443
-340
19
1980
Seattle SuperSonics
1
56
8897
8515
3554
7565
3495
7376
...
189
1730
2253
1380
2550
2043
750
428
1496
382
20
1980
Utah Jazz
0
24
8394
8887
3382
6817
3323
6632
...
185
1571
1943
967
2359
2005
656
362
1543
-493
21
1980
Washington Bullets
1
39
8773
8982
3574
7796
3501
7558
...
238
1552
2048
1334
2723
2201
530
443
1380
-209
22
1981
Atlanta Hawks
0
31
8604
8858
3291
6866
3281
6784
...
82
2012
2590
1201
2224
1846
749
469
1605
-254
23
1981
Boston Celtics
1
62
9008
8526
3581
7099
3516
6858
...
241
1781
2369
1155
2424
2202
683
594
1577
482
24
1981
Chicago Bulls
1
45
8937
8775
3457
6903
3419
6724
...
179
1985
2563
1227
2475
1925
729
514
1672
162
25
1981
Cleveland Cavaliers
0
28
8670
9068
3556
7609
3484
7360
...
249
1486
1909
1258
2243
2007
632
322
1396
-398
26
1981
Dallas Mavericks
0
15
8322
9011
3204
6928
3158
6763
...
165
1868
2487
1109
2177
1984
561
214
1439
-689
27
1981
Denver Nuggets
0
37
9986
10025
3784
7960
3754
7815
...
145
2388
3051
1325
2497
2030
720
380
1444
-39
28
1981
Detroit Pistons
0
21
8174
8692
3236
6986
3223
6902
...
84
1689
2330
1201
2111
1819
884
492
1759
-518
29
1981
Golden State Warriors
0
39
9006
9103
3560
7284
3500
7074
...
210
1826
2513
1403
2366
2026
611
301
1547
-97
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
805
2011
Atlanta Hawks
1
44
7790
7857
2971
6429
2469
5002
...
1427
1346
1728
762
2460
1802
497
341
1118
-67
806
2011
Boston Celtics
1
56
7913
7473
3023
6219
2615
5100
...
1119
1459
1895
639
2542
1921
676
344
1195
440
807
2011
Charlotte Bobcats
0
34
7650
7978
2873
6365
2480
5162
...
1203
1511
1999
848
2444
1728
524
433
1192
-328
808
2011
Chicago Bulls
1
62
8087
7487
3042
6587
2531
5172
...
1415
1492
2008
967
2654
1827
592
468
1161
600
809
2011
Cleveland Cavaliers
0
19
7827
8566
2886
6647
2377
5158
...
1489
1546
2075
856
2449
1720
544
341
1166
-739
810
2011
Dallas Mavericks
1
57
8220
7873
3069
6463
2424
4695
...
1768
1437
1850
780
2618
1954
557
352
1145
347
811
2011
Denver Nuggets
1
50
8811
8421
3145
6613
2483
4909
...
1704
1859
2429
791
2652
1813
605
352
1157
390
812
2011
Detroit Pistons
0
30
7951
8246
3056
6647
2584
5391
...
1256
1367
1854
931
2236
1730
595
328
1067
-295
813
2011
Golden State Warriors
0
36
8477
8668
3251
7047
2566
5298
...
1749
1290
1695
955
2370
1847
737
406
1198
-191
814
2011
Houston Rockets
0
43
8685
8506
3170
6975
2493
5132
...
1843
1668
2083
962
2549
1955
581
371
1110
179
815
2011
Indiana Pacers
1
37
8183
8271
3003
6787
2418
5134
...
1653
1592
2035
914
2657
1611
584
456
1262
-88
816
2011
Los Angeles Clippers
0
32
8089
8346
3015
6594
2502
5075
...
1519
1546
2187
955
2501
1813
585
402
1343
-257
817
2011
Los Angeles Lakers
1
57
8321
7820
3128
6757
2604
5270
...
1487
1541
1979
989
2616
1801
602
422
1073
501
818
2011
Memphis Grizzlies
1
46
8195
8003
3200
6801
2891
5875
...
926
1486
1981
970
2391
1691
771
441
1145
192
819
2011
Miami Heat
1
58
8369
7757
3031
6301
2484
4822
...
1479
1760
2288
790
2666
1639
544
430
1142
612
820
2011
Milwaukee Bucks
0
35
7534
7603
2814
6544
2331
5130
...
1414
1423
1881
862
2480
1545
617
399
1103
-69
821
2011
Minnesota Timberwolves
0
17
8288
8832
3090
7014
2501
5449
...
1565
1519
1977
1085
2556
1650
592
422
1398
-544
822
2011
New Jersey Nets
0
24
7722
8234
2918
6638
2459
5301
...
1337
1427
1881
909
2440
1723
458
384
1152
-512
823
2011
New Orleans Hornets
1
46
7784
7711
2944
6416
2500
5184
...
1232
1452
1897
824
2468
1691
624
359
1069
73
824
2011
New York Knicks
1
42
8734
8670
3140
6867
2375
4786
...
2081
1689
2087
847
2470
1757
625
475
1123
64
825
2011
Oklahoma City Thunder
1
55
8596
8285
3066
6609
2579
5206
...
1403
1977
2401
903
2604
1672
654
487
1156
311
826
2011
Orlando Magic
1
52
8135
7687
2956
6411
2186
4308
...
2103
1453
2101
864
2679
1636
548
384
1224
448
827
2011
Philadelphia 76ers
1
41
8119
7996
3125
6776
2682
5528
...
1248
1426
1851
850
2578
1861
621
355
1063
123
828
2011
Phoenix Suns
0
40
8611
8684
3219
6844
2518
4987
...
1857
1472
1939
821
2478
1945
545
357
1169
-73
829
2011
Portland Trail Blazers
1
48
7896
7771
2951
6599
2433
5096
...
1503
1476
1835
996
2230
1736
660
358
1070
125
830
2011
Sacramento Kings
0
24
8151
8589
3134
6979
2706
5702
...
1277
1455
1981
1071
2526
1675
608
391
1324
-438
831
2011
San Antonio Spurs
1
61
8502
8034
3148
6628
2463
4901
...
1727
1521
1984
829
2603
1836
602
372
1101
468
832
2011
Toronto Raptors
0
22
8124
8639
3144
6755
2799
5664
...
1091
1491
1976
963
2343
1795
581
350
1206
-515
833
2011
Utah Jazz
0
39
8153
8303
3064
6590
2629
5334
...
1256
1590
2061
898
2338
1921
629
484
1175
-150
834
2011
Washington Wizards
0
23
7977
8584
3048
6888
2656
5706
...
1182
1489
1999
1013
2374
1592
665
502
1258
-607
835 rows × 21 columns
Now we plot Wins as a function of points difference to get an idea whether there is a corellation.
In [13]:
NBA.plot(x='PTSdiff', y='W', kind='scatter')
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fae1c16e160>
To build a linear regression model, I followed this blog post. We need to import LinearRegression from scikit-learn:
In [14]:
from sklearn.linear_model import LinearRegression
And build the model:
In [15]:
lr = LinearRegression()
lr.fit(NBA[['PTSdiff']], NBA[['W']])
Out[15]:
LinearRegression(copy_X=True, fit_intercept=True, normalize=False)
I didn't find a method that would print a nice R-like summary of the model in python, so we need to print them manually:
In [16]:
print(lr.score(NBA[['PTSdiff']], NBA[['W']])) # R^2
print(lr.intercept_)
print(lr.coef_)
0.94234248197
[ 41.]
[[ 0.03258633]]
I've just found a method in pandas that prints out a summary!
In [17]:
from pandas.stats.api import ols
model = ols(x=NBA['PTSdiff'], y=NBA['W'])
In [18]:
model
Out[18]:
-------------------------Summary of Regression Analysis-------------------------
Formula: Y ~ <x> + <intercept>
Number of Observations: 835
Number of Degrees of Freedom: 2
R-squared: 0.9423
Adj R-squared: 0.9423
Rmse: 3.0612
F-stat (1, 833): 13614.3787, p-value: 0.0000
Degrees of Freedom: model 1, resid 833
-----------------------Summary of Estimated Coefficients------------------------
Variable Coef Std Err t-stat p-value CI 2.5% CI 97.5%
--------------------------------------------------------------------------------
x 0.0326 0.0003 116.68 0.0000 0.0320 0.0331
intercept 41.0000 0.1059 387.03 0.0000 40.7924 41.2076
---------------------------------End of Summary---------------------------------
There are several ways of creating a multidimensional model, but I like this one: link.
First, we need to convert column names like in R --- otherwise the methods will throw an error
In [19]:
NBA['X2PA'] = NBA['2PA']
NBA['X3PA'] = NBA['3PA']
In [20]:
import statsmodels.formula.api as sm
model = sm.ols(formula="PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + TOV + STL + BLK", data=NBA).fit()
In [21]:
model.summary()
Out[21]:
OLS Regression Results
Dep. Variable: PTS R-squared: 0.899
Model: OLS Adj. R-squared: 0.898
Method: Least Squares F-statistic: 817.3
Date: Thu, 19 Mar 2015 Prob (F-statistic): 0.00
Time: 21:18:08 Log-Likelihood: -5541.1
No. Observations: 835 AIC: 1.110e+04
Df Residuals: 825 BIC: 1.115e+04
Df Model: 9
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -2050.8108 203.487 -10.078 0.000 -2450.223 -1651.398
X2PA 1.0429 0.030 35.274 0.000 0.985 1.101
X3PA 1.2586 0.038 32.747 0.000 1.183 1.334
FTA 1.1280 0.034 33.440 0.000 1.062 1.194
AST 0.8858 0.044 20.150 0.000 0.799 0.972
ORB -0.9554 0.078 -12.261 0.000 -1.108 -0.802
DRB 0.0388 0.062 0.631 0.528 -0.082 0.160
TOV -0.0248 0.061 -0.405 0.686 -0.145 0.095
STL -0.1992 0.092 -2.169 0.030 -0.379 -0.019
BLK -0.0558 0.088 -0.635 0.526 -0.228 0.117
Omnibus: 1.265 Durbin-Watson: 1.820
Prob(Omnibus): 0.531 Jarque-Bera (JB): 1.301
Skew: -0.094 Prob(JB): 0.522
Kurtosis: 2.951 Cond. No. 2.34e+05
Let's print the residuals:
In [22]:
model.resid
Out[22]:
0 38.572271
1 142.872004
2 -92.895718
3 -8.391347
4 -258.470561
5 171.460833
6 150.408162
7 169.381143
8 40.775620
9 -75.325661
10 444.908874
11 94.386470
12 -205.680905
13 113.596904
14 64.199400
...
820 -135.417211
821 108.267709
822 -171.341020
823 102.443908
824 156.082920
825 210.052169
826 109.490894
827 -20.535417
828 59.284572
829 175.923527
830 30.653182
831 262.672801
832 70.067186
833 -17.578942
834 -8.339305
Length: 835, dtype: float64
In [23]:
SSE = sum(model.resid**2)
SSE
Out[23]:
28394313.994756646
In [24]:
RMSE = np.sqrt(SSE/len(NBA))
RMSE
Out[24]:
184.40489814749066
In [25]:
np.mean(NBA['PTS'])
Out[25]:
8370.2395209580845
In [26]:
model1 = sm.ols(formula="PTS ~ X2PA + X3PA + FTA + AST + ORB + DRB + STL + BLK", data=NBA).fit()
model1.summary()
Out[26]:
OLS Regression Results
Dep. Variable: PTS R-squared: 0.899
Model: OLS Adj. R-squared: 0.898
Method: Least Squares F-statistic: 920.4
Date: Thu, 19 Mar 2015 Prob (F-statistic): 0.00
Time: 21:18:08 Log-Likelihood: -5541.2
No. Observations: 835 AIC: 1.110e+04
Df Residuals: 826 BIC: 1.114e+04
Df Model: 8
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -2076.6782 193.084 -10.755 0.000 -2455.672 -1697.684
X2PA 1.0435 0.030 35.366 0.000 0.986 1.101
X3PA 1.2627 0.037 34.099 0.000 1.190 1.335
FTA 1.1254 0.033 34.023 0.000 1.060 1.190
AST 0.8861 0.044 20.173 0.000 0.800 0.972
ORB -0.9582 0.078 -12.350 0.000 -1.110 -0.806
DRB 0.0389 0.062 0.632 0.527 -0.082 0.160
STL -0.2068 0.090 -2.301 0.022 -0.383 -0.030
BLK -0.0586 0.087 -0.670 0.503 -0.230 0.113
Omnibus: 1.258 Durbin-Watson: 1.824
Prob(Omnibus): 0.533 Jarque-Bera (JB): 1.296
Skew: -0.093 Prob(JB): 0.523
Kurtosis: 2.950 Cond. No. 2.19e+05
In [27]:
model2 = sm.ols(formula="PTS ~ X2PA + X3PA + FTA + AST + ORB + STL + BLK", data=NBA).fit()
model2.summary()
Out[27]:
OLS Regression Results
Dep. Variable: PTS R-squared: 0.899
Model: OLS Adj. R-squared: 0.898
Method: Least Squares F-statistic: 1053.
Date: Thu, 19 Mar 2015 Prob (F-statistic): 0.00
Time: 21:18:08 Log-Likelihood: -5541.4
No. Observations: 835 AIC: 1.110e+04
Df Residuals: 827 BIC: 1.114e+04
Df Model: 7
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -2015.4630 167.009 -12.068 0.000 -2343.274 -1687.652
X2PA 1.0483 0.029 36.753 0.000 0.992 1.104
X3PA 1.2708 0.035 36.568 0.000 1.203 1.339
FTA 1.1285 0.033 34.506 0.000 1.064 1.193
AST 0.8909 0.043 20.597 0.000 0.806 0.976
ORB -0.9702 0.075 -12.903 0.000 -1.118 -0.823
STL -0.2276 0.084 -2.724 0.007 -0.392 -0.064
BLK -0.0388 0.082 -0.475 0.635 -0.199 0.121
Omnibus: 1.168 Durbin-Watson: 1.834
Prob(Omnibus): 0.558 Jarque-Bera (JB): 1.208
Skew: -0.090 Prob(JB): 0.546
Kurtosis: 2.948 Cond. No. 1.79e+05
In [28]:
model3 = sm.ols(formula="PTS ~ X2PA + X3PA + FTA + AST + ORB + STL", data=NBA).fit()
model3.summary()
Out[28]:
OLS Regression Results
Dep. Variable: PTS R-squared: 0.899
Model: OLS Adj. R-squared: 0.898
Method: Least Squares F-statistic: 1229.
Date: Thu, 19 Mar 2015 Prob (F-statistic): 0.00
Time: 21:18:08 Log-Likelihood: -5541.5
No. Observations: 835 AIC: 1.110e+04
Df Residuals: 828 BIC: 1.113e+04
Df Model: 6
coef std err t P>|t| [95.0% Conf. Int.]
Intercept -2032.7164 162.942 -12.475 0.000 -2352.544 -1712.889
X2PA 1.0500 0.028 37.117 0.000 0.994 1.105
X3PA 1.2731 0.034 37.001 0.000 1.206 1.341
FTA 1.1273 0.033 34.581 0.000 1.063 1.191
AST 0.8884 0.043 20.701 0.000 0.804 0.973
ORB -0.9743 0.075 -13.051 0.000 -1.121 -0.828
STL -0.2268 0.084 -2.717 0.007 -0.391 -0.063
Omnibus: 1.174 Durbin-Watson: 1.834
Prob(Omnibus): 0.556 Jarque-Bera (JB): 1.222
Skew: -0.089 Prob(JB): 0.543
Kurtosis: 2.942 Cond. No. 1.74e+05
In [29]:
SSE3 = sum(model3.resid**2)
SSE3
Out[29]:
28421464.862623505
In [30]:
RMSE3 = np.sqrt(SSE3/len(NBA))
RMSE3
Out[30]:
184.49304179347197
In [31]:
NBA_test = pd.read_csv("NBA_test.csv")
NBA_test['X2PA'] = NBA_test['2PA']
NBA_test['X3PA'] = NBA_test['3PA']
In [32]:
prediction = model3.predict(NBA_test)
In [33]:
SSE_pred = sum((prediction-NBA_test['PTS'])**2)
In [34]:
SST_pred = sum((np.mean(NBA['PTS']) - NBA_test['PTS'])**2)
In [35]:
R2 = 1-SSE_pred/SST_pred
R2
Out[35]:
0.81271418527713002
In [36]:
RMSE_pred = np.sqrt(SSE_pred/len(NBA_test))
RMSE_pred
Out[36]:
196.3723439642647
In [ ]:
Content source: alkamid/The-Analytics-Edge-in-IPython
Similar notebooks: